###############################################################################
#                                                                             #
#                   WHEN CUES COLLIDE: PARTISAN SIGNALS AND THE               #
#                         DYNAMICS OF ETHNIC VOTING IN                        #
#                         NONPARTISAN LOCAL ELECTIONS                         #
#                               Replication Code                              #                        
#                               E. Grant Baldwin                              #
#                             Last Updated: 01/6/26                           #
#                                                                             #
###############################################################################

# =========================================================================== #
#                   PREAMBLE (LOADING NECESSARY PACKAGES)                     #
# =========================================================================== #

pacman::p_load(tidyverse, openxlsx, sf, pdftools, ggplot2, patchwork,
               fixest, lmtest, knitr, kableExtra, modelsummary,
               ggrepel, haven, UpSetR, marginaleffects)

# Enter your working directory here
setwd()

# =========================================================================== #
#                               LOAD DATA                                     #
# =========================================================================== #

dat <- read_csv('cues_collide_data.csv') %>%
  filter(match_flag == 1, n_hisp_candidates >= 1)

# =========================================================================== #
#                   MAPS OF CONTESTS (FIGURES A.1 & A.2)                      #
# =========================================================================== #

# --------------------------------------------------------------------------- #
# STEP 1: Loading in California Places Shapefiles                             #
# --------------------------------------------------------------------------- #

# Step 1.1: Read & Clean California Places
geo_places <- read_sf('Shapefiles/tl_2010_06_place10.shp') %>%
  select(c(GEOID10, NAME10, INTPTLAT10,INTPTLON10)) %>%
  rename(
    fips = GEOID10,
    city_caps = NAME10,
    lat = INTPTLAT10,
    long = INTPTLON10) %>%
  ### Recenter San Francisco's Centroid on Map to Make More Sense Visually
  mutate(
    lat = if_else(city_caps == "San Francisco",
                  '+37.77938', lat),
    long = if_else(city_caps == "San Francisco",
                   '-122.41843', long),
    lat = as.numeric(lat),
    long = as.numeric(long))

# Step 1.2: Read & Clean California State Boundaries
geo_state <- read_sf('Shapefiles/ca-state-boundary/CA_State_TIGER2016.shp') %>%
  select(c(NAME, INTPTLAT, INTPTLON))

# Step 1.3: Gathering the Unique Contests for Mapping
map_contest <- dat %>% 
  filter(match_flag == 1) %>%
  filter(n_hisp_candidates >= 1) %>%
  select(c(contest, county, fips, geo_name, n_candidates,
           n_hisp_candidates)) %>%
  distinct()

# Step 1.4: Adding Latitutdes and Longitudes
map_contest <- left_join(map_contest, geo_places, by = "fips") %>%
  mutate(
    city_label = paste(city_caps, substr(contest, 8, 11), sep = ", "))

# Step 1.5: Transforming 'map_contest' into a Spatial Object
map_contest <- st_as_sf(map_contest, coords = c('long', 'lat'), crs = 4326)

# --------------------------------------------------------------------------- #
# STEP 2: Plotting the Pooled Contests Map                                    #
# --------------------------------------------------------------------------- #

# Step 2.1: Code to Create Map
map1 <- ggplot() +
  geom_sf(data = geo_state) +
  geom_sf(data = map_contest, size = 1, color = "forestgreen") +
  geom_text_repel(
    data = map_contest,
    aes(geometry = geometry, label = city_label),
    stat = "sf_coordinates",
    size = 1.75,
    color = "black",
    box.padding = 0.3,
    point.padding = 0.2,
    segment.color = "gray50",
    segment.size = 0.3,
    max.overlaps = 25
  ) +
  coord_sf() +
  theme_minimal() +
  theme(
    axis.text.x = element_blank(),  
    axis.text.y = element_blank(),  
    axis.title.x = element_blank(), 
    axis.title.y = element_blank(), 
    panel.grid.major = element_blank(), 
    panel.grid.minor = element_blank()
  ) +
  labs(
    title = "All Mayoral Contests with ≥ 1 Latino Candidate",
    subtitle = "2010-2021",
    caption = "N = 103"
  )

# Step 2.2: Display Map in R Viewer
suppressWarnings(print(map1))

# --------------------------------------------------------------------------- #
# STEP 3: Plotting the SoCal Contests Map                                     #
# --------------------------------------------------------------------------- #

# Step 3.1: Reproject to Lat/Lon
map_contest <- st_transform(map_contest, crs = 4326)
geo_state <- st_transform(geo_state, crs = 4326)

# Step 3.2: Subset to SoCal Bounding Box (LA, Orange, Riverside, San Bernardino)
bbox_socal <- st_bbox(c(xmin = -118.9, xmax = -116.7, ymin = 33.4, ymax = 34.4), 
                      crs = st_crs(map_contest))
la_map <- st_crop(map_contest, bbox_socal)

# Step 3.3: Extract Coordinates for Label Positioning
la_map_coords <- la_map %>%
  mutate(lon = st_coordinates(geometry)[, 1],
         lat = st_coordinates(geometry)[, 2])

# Step 3.4: Code to Generate Map
map2 <- ggplot() +
  geom_sf(data = geo_state, fill = "grey95", color = "black") +
  geom_sf(data = la_map, color = "forestgreen", size = 1) +
  geom_text_repel(data = la_map_coords,
                  aes(x = lon, y = lat, label = city_label),
                  size = 1.75, max.overlaps = 20) +
  coord_sf(xlim = c(-118.9, -116.7), ylim = c(33.4, 34.4), expand = FALSE) +
  labs(
    x = "Latitude",
    y = "Longitude",
    title = "Mayoral Contests with at Least One Latino Candidate",
    caption = "Subset of 2010–2021 mayoral contests from Los Angeles, Orange, Riverside, and San Bernardino counties"
  ) +
  theme_minimal()

# Step 3.5: Display Map in R Viewer
suppressWarnings(print(map2))

# =========================================================================== #
#     CANDIDATES IN SAMPLE BY ETHNICITY AND PARTY (FIGURES 1,2 & TABLE 2)     #
# =========================================================================== #

# --------------------------------------------------------------------------- #
# STEP 1: Create an Object that Tracks # of Candidates by Types               #
# --------------------------------------------------------------------------- #

# Step 1.1: Create a Long Object to Begin to Count Candidates
cand_count <- dat %>% filter(match_flag == 1) %>%
  filter(n_hisp_candidates >= 1) %>% select(contest, geo_name,
                                            starts_with("cand_"), starts_with("race_cand_"), starts_with("party_cand_")) %>%
  distinct() %>%
  pivot_longer(
    cols = matches("^(cand_|race_cand_|party_cand_)\\d+$"),
    names_to = c(".value", "candidate_number"),
    names_pattern = "(cand|race_cand|party_cand)_(\\d+)") %>%
  filter(!is.na(cand)) %>%
  mutate(
    year = substr(contest, 8, 11)) %>% 
  arrange(year, geo_name)

# Step 1.2: Create a Dataframe with Counts for the Plot
cand_count_plot <- tibble(
  label = c("All Latino Candidates", "Latino Nonpartisan", "Latino Known D", "Latino Known R",
            "All Non-Latino Candidates", "Non-Latino Nonpartisan", "Non-Latino Known D", "Non-Latino Known R"),
  count = c(cand_count %>% filter(race_cand == 'hispanic') %>% nrow(),
            cand_count %>% filter(race_cand == 'hispanic' & party_cand == 'Unknown') %>% nrow(),
            cand_count %>% filter(race_cand == 'hispanic' & party_cand == 'Known D') %>% nrow(),
            cand_count %>% filter(race_cand == 'hispanic' & party_cand == 'Known R') %>% nrow(),
            cand_count %>% filter(race_cand != 'hispanic') %>% nrow(),
            cand_count %>% filter(race_cand != 'hispanic' & party_cand == 'Unknown') %>% nrow(),
            cand_count %>% filter(race_cand != 'hispanic' & party_cand == 'Known D') %>% nrow(),
            cand_count %>% filter(race_cand != 'hispanic' & party_cand == 'Known R') %>% nrow()))
cand_count_plot$label <- factor(
  cand_count_plot$label,
  levels = c(
    "Non-Latino Known R",
    "Non-Latino Known D",
    "Non-Latino Nonpartisan",
    "All Non-Latino Candidates",
    "Latino Known R",
    "Latino Known D",
    "Latino Nonpartisan",
    "All Latino Candidates"))

# --------------------------------------------------------------------------- #
# STEP 2: Generate Figure 1                                                   #
# --------------------------------------------------------------------------- #

# Step 2.1: Code to Plot Number of Candidates by Type
count_plot <- ggplot(cand_count_plot, aes(x = count, y = label, fill = label)) +
  geom_col() +
  geom_text(aes(label = count), 
            hjust = -0.15, 
            size = 3.5) +
  scale_fill_manual(values = c(
    "All Latino Candidates" = "darkolivegreen",
    "Latino Nonpartisan" = "forestgreen",
    "Latino Known D" = "darkblue",
    "Latino Known R" = "darkred",
    "All Non-Latino Candidates" = "seagreen",
    "Non-Latino Nonpartisan" = "#66a61e",
    "Non-Latino Known D" = "lightblue",
    "Non-Latino Known R" = "lightcoral"
  )) +
  labs(
    x = "Number of Candidates",
    y = "",
    title = "Candidates in Sample by Ethnicity & Party"
  ) +
  theme_classic() +
  theme(legend.position = "none") + scale_x_continuous(expand = expansion(mult = c(0, 0.12))) +
  theme(plot.title = element_text(size = 12, hjust = 0.5))

# Step 2.2: Display Plot in R Viewer 
print(count_plot)

# --------------------------------------------------------------------------- #
# STEP 3: Table for Contest Candidate Combinations (Table 2 & Figure A.3)     #
# --------------------------------------------------------------------------- #

# Step 3.1: Modify 'cand_count' Object
n_cand_contest <- cand_count %>%
  group_by(contest) %>%
  summarise(l_np = sum(race_cand == "hispanic" & party_cand == "Unknown"),
            l_kd = sum(race_cand == "hispanic" & party_cand == "Known D"),
            l_kr = sum(race_cand == "hispanic" & party_cand == "Known R"),
            nl_np = sum(race_cand != "hispanic" & party_cand == "Unknown"),
            nl_kd = sum(race_cand != "hispanic" & party_cand == "Known D"),
            nl_kr = sum(race_cand != "hispanic" & party_cand == "Known R")) %>%
  mutate(l_all_np = 
           if_else(l_np >= 1 & l_kd == 0 & l_kr == 0, 1, 0),
         l_atone_kd =
           if_else(l_kd >= 1, 1, 0),
         l_atone_kr = 
           if_else(l_kr >= 1, 1, 0),
         nl_all_np = 
           if_else(nl_np >= 1 & nl_kd == 0 & nl_kr == 0, 1, 0),
         nl_atone_kd = 
           if_else(nl_kd >= 1, 1, 0),
         nl_atone_kr = 
           if_else(nl_kr >= 1, 1, 0))

# Step 3.2: Create Object to Store Table Information
tab_cand_contest <- tibble(
  ` ` = c("All NP", "At Least One Known D", "At Least One Known R"),
  `All NP` = c(
    nrow(n_cand_contest %>% filter(l_all_np == 1 & nl_all_np == 1)),
    nrow(n_cand_contest %>% filter(l_atone_kd == 1 & nl_all_np == 1)),
    nrow(n_cand_contest %>% filter(l_atone_kr == 1 & nl_all_np == 1)) 
  ),
  `At Least One Known D` = c(
    nrow(n_cand_contest %>% filter(l_all_np == 1 & nl_atone_kd == 1)),
    nrow(n_cand_contest %>% filter(l_atone_kd == 1 & nl_atone_kd == 1)),
    nrow(n_cand_contest %>% filter(l_atone_kr == 1 & nl_atone_kd == 1))
  ),
  `At Least One Known R` = c(
    nrow(n_cand_contest %>% filter(l_all_np == 1 & nl_atone_kr == 1)),
    nrow(n_cand_contest %>% filter(l_atone_kd == 1 & nl_atone_kr == 1)),
    nrow(n_cand_contest %>% filter(l_atone_kr == 1 & nl_atone_kr == 1))
  )
)

# Step 3.3: Beautify Table using Kable Extra
k <- kable(tab_cand_contest) %>%
  add_header_above(c(" " = 1, "Non-Latino Candidates" = 3)) %>%
  pack_rows("Latino Candidates", 1, 3)

# Step 3.4: Present Table
k

# --------------------------------------------------------------------------- #
# STEP 3.5: Upset Plot (Figure A.3)                                           #
# --------------------------------------------------------------------------- #

# Step 3.5.1: Add a Flag for all the Nonpartisan Races
dat_up <- dat %>% mutate(
  all_np = if_else(knownD_hisp == 0 &
                     knownR_hisp == 0 &
                     knownD_nonhisp == 0 &
                     knownR_nonhisp == 0, 1, 0)) %>%
  select(contest, geo_name, year, knownD_hisp, knownR_hisp,
         knownD_nonhisp, knownR_nonhisp, all_np, match_flag) %>%
  distinct()

# Step 3.5.2: Create an Object Just with the Info Needed for the Plot
dat_up <- as.data.frame(lapply(dat_up[, c("knownD_hisp", "knownR_hisp", 
                                       "knownD_nonhisp", "knownR_nonhisp", "all_np")], 
                               as.integer)) %>%
  rename(
    `All Cands NP` = all_np,
    `Latino Known D` = knownD_hisp,
    `Latino Known R` = knownR_hisp,
    `Non-Latino Known D` = knownD_nonhisp,
    `Non-Latino Known R` = knownR_nonhisp
  )

# Step 3.5.2: Now make the UpSet Plot, Including the Indicator for all_np
upset(dat_up,
      sets = c("Latino Known D", "Latino Known R", 
               "Non-Latino Known D", "Non-Latino Known R",
               "All Cands NP"),
      sets.bar.color = "#1f77b4",
      order.by = "freq")

# --------------------------------------------------------------------------- #
# STEP 4: Generate Figure 2                                                   #
# --------------------------------------------------------------------------- #

# Step 4.1: Create Object that Tracks Candidates and Contests Over Time
time_hisp <- dat %>% filter(match_flag == 1) %>%
  select(c(year, month, contest, geo_name, n_candidates, n_hisp_candidates,
           knownD_hisp, knownD_nonhisp, knownR_hisp, knownR_nonhisp)) %>%
  distinct() %>%
  filter(n_hisp_candidates != 0) %>%
  arrange(n_candidates, desc(geo_name))
time_hisp_plot <- time_hisp %>% group_by(year) %>%
  summarize(
    contests = n_distinct(contest),
    knownD_hisp = sum(knownD_hisp),
    knownR_hisp = sum(knownR_hisp),
    knownD_nonhisp = sum(knownD_nonhisp),
    knownR_nonhisp = sum(knownR_nonhisp)) %>%
  pivot_longer(cols = c(contests, knownD_hisp, knownR_hisp,
                        knownD_nonhisp, knownR_nonhisp),
               names_to = "variable",
               values_to = "value")

# Step 4.2: All Contests with ≥1 Latino Candidate Plot
col1 <- ggplot(time_hisp_plot[time_hisp_plot$variable == "contests",], 
               aes(x = year, y = value, fill = variable)) +
  geom_col(fill = "forestgreen") +  # Custom fill for "contests"
  theme_classic() +
  theme(legend.position = "none") +
  scale_x_continuous(limits = c(2009, 2022), breaks = seq(2010, 2021, by = 1)) +
  labs(x = "",
       y = "# of Contests",
       title = "All Mayoral Contests With Latino Candidate(s) Over Time") +
  theme(plot.title = element_text(size = 12, hjust = 0.5))

# Step 4.3: All Contests with Known Latino D
col2 <- ggplot(time_hisp_plot[time_hisp_plot$variable == "knownD_hisp",], 
               aes(x = year, y = value, fill = variable)) +
  geom_col(fill = "darkblue") +  # Custom fill for "knownD_hisp"
  theme_classic() +
  theme(legend.position = "none") +
  scale_x_continuous(limits = c(2009, 2022), breaks = seq(2010, 2021, by = 2)) +
  scale_y_continuous(limits = c(0, 10), breaks = seq(0, 10, by = 2)) +
  labs(x = "",
       y = "# of Contests",
       subtitle = "Known Latino D Cand.") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 5)) +
  theme(plot.subtitle = element_text(size = 6, hjust = 0.5))

# Step 4.4: All Contests with Known Latino R
col3 <- ggplot(time_hisp_plot[time_hisp_plot$variable == "knownR_hisp",], 
               aes(x = year, y = value, fill = variable)) +
  geom_col(fill = "darkred") +  # Custom fill for "knownR_hisp"
  theme_classic() +
  theme(legend.position = "none") +
  scale_x_continuous(limits = c(2009, 2022), breaks = seq(2010, 2021, by = 2)) +
  scale_y_continuous(limits = c(0, 10), breaks = seq(0, 10, by = 2)) +
  labs(x = "",
       y = "# of Contests",
       subtitle = "Known Latino R Cand.") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 5)) +
  theme(plot.subtitle = element_text(size = 6, hjust = 0.5)) 

# Step 4.5: All Contests with Known Non-Latino D
col4 <- ggplot(time_hisp_plot[time_hisp_plot$variable == "knownD_nonhisp",], 
               aes(x = year, y = value, fill = variable)) +
  geom_col(fill = "lightblue") +  # Custom fill for "knownD_nonhisp"
  theme_classic() +
  theme(legend.position = "none") +
  scale_x_continuous(limits = c(2009, 2022), breaks = seq(2010, 2021, by = 2)) +
  scale_y_continuous(limits = c(0, 10), breaks = seq(0, 10, by = 2)) +
  labs(x = "",
       y = "# of Contests",
       subtitle = "Known Non-Latino D Cand.") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 5)) +
  theme(plot.subtitle = element_text(size = 6, hjust = 0.5))

# Step 4.6: All Contests with Known Non-Latino R
col5 <- ggplot(time_hisp_plot[time_hisp_plot$variable == "knownR_nonhisp",], 
               aes(x = year, y = value, fill = variable)) +
  geom_col(fill = "lightcoral") +  # Custom fill for "knownR_nonhisp"
  theme_classic() +
  theme(legend.position = "none") +
  scale_x_continuous(limits = c(2009, 2022), breaks = seq(2010, 2021, by = 2)) +
  scale_y_continuous(limits = c(0, 10), breaks = seq(0, 10, by = 2)) +
  labs(x = "",
       y = "# of Contests",
       subtitle = "Known Non-Latino R Cand.") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 5)) +
  theme(plot.subtitle = element_text(size = 6, hjust = 0.5))

# Step 4.7: Combine Plots Using Patchwork
combined_col <- col1 + 
  (col2 | col3 | col4 | col5) +
  plot_layout(heights = c(2, 1))

# Step 4.8: Display Plot in R Viewer
print(combined_col)

# =========================================================================== #
#   DISTRIBUTION & PARTISANSHIP OF LATINO VOTERS ACROSS PRECINCTS (FIGURE 3)  #
# =========================================================================== #

# --------------------------------------------------------------------------- #
# STEP 1: Generate Sub-plots                                                  #
# --------------------------------------------------------------------------- #

# Step 1.1: All Latino Registered
histo1 <- ggplot(data = dat %>% filter(n_hisp_candidates >= 1 &
                                         match_flag == 1 &
                                         total_reg >= 5), aes(p_hisp_reg)) +
  geom_histogram(fill = "forestgreen") +
  geom_vline(aes(xintercept = mean(p_hisp_reg, na.rm = T)),
             color = "blue", linetype = "dashed", size = 1) +
  theme_classic() +
  scale_x_continuous(limits = c(-0.03, 1), breaks = seq(0, 1, by = 0.2)) +
  labs(x = "Share of Registered Voters Latino in Precinct",
       y = "# of Precincts",
       title = "All Latino Registered Voters") +
  theme(plot.title = element_text(size = 14, hjust = 0.5))

# Step 1.2: Latino Democrats
histo4 <- ggplot(data = dat %>% filter(n_hisp_candidates >= 1 &
                                         match_flag == 1 &
                                         total_reg >= 10), aes(p_hisp_dem_reg)) +
  geom_histogram(fill = "lightblue") +
  geom_vline(aes(xintercept = mean(p_hisp_dem_reg)),
             color = "red", linetype = "dashed", size = 1) +
  theme_classic() +
  scale_x_continuous(limits = c(-0.03, 1), breaks = seq(0, 1, by = 0.25)) +
  labs(x = "Share of Latino Democrats in Precinct",
       y = "# of Precincts",
       title = "Latino Democrats") +
  theme(plot.title = element_text(size = 10, hjust = 0.5),
        axis.title.x = element_text(size = 8))

# Step 1.3: Latino Republicans
histo5 <- ggplot(data = dat %>% filter(n_hisp_candidates >= 1 &
                                         match_flag == 1 &
                                         total_reg >= 10), aes(p_hisp_rep_reg)) +
  geom_histogram(fill = "lightcoral") +
  geom_vline(aes(xintercept = mean(p_hisp_rep_reg)),
             color = "blue", linetype = "dashed", size = 1) +
  theme_classic() +
  scale_x_continuous(limits = c(-0.03, 1), breaks = seq(0, 1, by = 0.25)) +
  labs(x = "Share of Latino Republicans in Precinct",
       y = "# of Precincts",
       title = "Latino Republicans") +
  theme(plot.title = element_text(size = 10, hjust = 0.5),
        axis.title.x = element_text(size = 8))

# --------------------------------------------------------------------------- #
# STEP 2: Combine Sub-plots and Print Plot                                    #
# --------------------------------------------------------------------------- #

# Step 2.1: Combine the Plots Together
combined_histo <- histo1 +
  (histo4 | histo5) +
  plot_layout(heights = c(2, 1))

# Step 2.2: Display Plot in R Viewer
suppressWarnings(print(combined_histo))

# =========================================================================== #
#     A GENERAL PATTERN OF ETHNIC VOTING ACROSS ALL CONTESTS (FIGURE 4)       #
# =========================================================================== #

# --------------------------------------------------------------------------- #
# STEP 1: Generate Scatterplot                                                #
# --------------------------------------------------------------------------- #

# Step 1.1: Code to Build Plot
scat <- ggplot(data = dat %>% filter(n_hisp_candidates >= 1 &
                                       match_flag == 1 &
                                       total_reg >= 5), 
               aes(x=p_hisp_reg, y=p_hisp_votes)) +
  geom_point(color = "forestgreen", alpha = 0.07) +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "orange", 
              size = 1.5) +
  geom_smooth(method="lm", color = "darkblue") +
  theme_classic() +
  scale_y_continuous(limits = c(0, 1), breaks = seq(0, 1, by = 0.2)) +
  scale_x_continuous(limits = c(0, 1), breaks = seq(0, 1, by = 0.2)) +
  labs(x = "Share of Registered Voters Latino in Precinct",
       y = "Latino Candidate(s) Vote Share",
       title = "The Ethnic Voting Relationship in 103 Multiethnic Mayoral Contests") +
  theme(plot.title = element_text(size = 12, hjust = 0.5))

# Step 1.2: Display Plot in R Viewer
suppressWarnings(print(scat))

# --------------------------------------------------------------------------- #
# STEP 2: Estimate Linear Relationship                                        #
# --------------------------------------------------------------------------- #

summary(lm(p_hisp_votes ~ p_hisp_reg, data = dat %>% filter(total_reg >= 5)))

# =========================================================================== #
#               MAIN REGRESSION MODLES (TABLE 3 & FIGURE 5)                   #
# =========================================================================== #

# --------------------------------------------------------------------------- #
# STEP 1: Estimate Model for All Contests (Pooled Model)                      #
# --------------------------------------------------------------------------- #

# Step 1.1: Define Formula & Estimation
hisp_pool_lm <- feols(p_hisp_votes ~ p_hisp_reg +
                        p_hisp_reg*knownD_hisp +
                        p_hisp_reg*knownR_hisp +
                        p_hisp_reg*knownD_nonhisp +
                        p_hisp_reg*knownR_nonhisp | year^geo_name,
                      data = dat %>% filter(total_reg >= 5))

# Step 1.2: Present Results in R
summary(hisp_pool_lm)

# --------------------------------------------------------------------------- #
# STEP 2: Estimate Model for Contests w/ Viable Latino Candidate(s)           #
# --------------------------------------------------------------------------- #

# Step 2.1: Create Viable Latino Partisan Indicators
dat <- dat %>%
  mutate(
    via_lat_knownD = as.integer(rowSums(
      (across(starts_with("party_cand_"), ~ . == "Known D")) *
        (across(starts_with("lone_viable_latino_")))
      , na.rm = TRUE) > 0),
    via_lat_knownR = as.integer(rowSums(
      (across(starts_with("party_cand_"), ~ . == "Known R")) *
        (across(starts_with("lone_viable_latino_")))
      , na.rm = TRUE) > 0)
  )

# Step 2.2: Define Formula & Estimation
via_lat_lm <- feols(via_lat_vs ~ p_hisp_reg +
                      p_hisp_reg*via_lat_knownD +
                      p_hisp_reg*via_lat_knownR +
                      p_hisp_reg*knownD_nonhisp +
                      p_hisp_reg*knownR_nonhisp | year^geo_name,
                    data = dat %>% filter(total_reg >= 5 & !is.na(via_lat_vs)))

# Step 2.3: Present Results in R
summary(via_lat_lm)

# --------------------------------------------------------------------------- #
# STEP 3: Estimate Model for Two-Candidate Contests                           #
# --------------------------------------------------------------------------- #

# Step 3.1: Define Formula & Estimation
two_cand_lm <- feols(p_hisp_votes ~ p_hisp_reg +
                       p_hisp_reg*knownD_hisp +
                       p_hisp_reg*knownR_hisp +
                       p_hisp_reg*knownD_nonhisp +
                       p_hisp_reg*knownR_nonhisp | year^geo_name,
                     data = dat %>% filter(total_reg >= 5 & n_candidates == 2))

# Step 3.2: Present Results in R
summary(two_cand_lm)

# --------------------------------------------------------------------------- #
# STEP 5: Generate Interaction Plot                                           #
# --------------------------------------------------------------------------- #

# Step 5.1: Generate Test Data
test_data <- data_frame(
  p_hisp_reg = rep(seq(0,1, by = 0.1), 5),
  knownD_hisp = c(rep(0,11),rep(1,11),rep(0,33)),
  knownR_hisp = c(rep(0,22),rep(1,11),rep(0,22)),
  knownD_nonhisp = c(rep(0,33),rep(1,11),rep(0,11)),
  knownR_nonhisp = c(rep(0,44),rep(1,11)),
  year = 2010,
  geo_name = "tracy")

# Step 5.2: Define Coefficients
coefs <- as.list(coefficients(hisp_pool_lm))

# Step 5.3: Define Standard Errors
ses <- as.list(se(hisp_pool_lm))

# Step 5.4: Grab Fixed Effect
fe <- fixef(hisp_pool_lm)$`year^geo_name`["2010_tracy"]

# Step 5.5: Generating Predicted Values
test_data <- test_data %>%
  mutate(
    pred = fe +
      as.numeric(coefs["p_hisp_reg"]) * p_hisp_reg +
      as.numeric(coefs["p_hisp_reg:knownD_hisp"]) * p_hisp_reg * knownD_hisp +
      as.numeric(coefs["p_hisp_reg:knownR_hisp"]) * p_hisp_reg * knownR_hisp +
      as.numeric(coefs["p_hisp_reg:knownD_nonhisp"]) * p_hisp_reg * knownD_nonhisp +
      as.numeric(coefs["p_hisp_reg:knownR_nonhisp"]) * p_hisp_reg * knownR_nonhisp,
    pred_var = (as.numeric(ses["p_hisp_reg"]) * p_hisp_reg)^2 +
      (as.numeric(ses["p_hisp_reg:knownD_hisp"]) * p_hisp_reg * knownD_hisp)^2 +
      (as.numeric(ses["p_hisp_reg:knownR_hisp"]) * p_hisp_reg * knownR_hisp)^2 +
      (as.numeric(ses["p_hisp_reg:knownD_nonhisp"]) * p_hisp_reg * knownD_nonhisp)^2 +
      (as.numeric(ses["p_hisp_reg:knownR_nonhisp"]) * p_hisp_reg * knownR_nonhisp)^2,
    lower = pred - 1.96 * sqrt(pred_var),
    upper = pred + 1.96 * sqrt(pred_var)
  )

# Step 5.6: Prepare Object for Plotting
test_data <- test_data %>%
  mutate(
    `Partisan Status:` = case_when(
      knownD_hisp == 1 ~ "Latino Known D",
      knownR_hisp == 1 ~ "Latino Known R",
      knownD_nonhisp == 1 ~ "Non-Latino Known D",
      knownR_nonhisp == 1 ~ "Non-Latino Known R",
      .default = "All Candidates Nonpartisan"
    )
  )

# Step 5.7: Code to Generate Plot
combined_inter <- ggplot(data = test_data %>% 
                           filter(`Partisan Status:` != "Latino Known D",
                                  `Partisan Status:` != "Latino Known R"),
                         aes(x = p_hisp_reg, y = pred,
                             color = `Partisan Status:`,
                             linetype = `Partisan Status:`)) +
  geom_line(size = 1) +
  geom_ribbon(
    aes(x = p_hisp_reg, ymin = lower, ymax = upper,
        fill = `Partisan Status:`),
    alpha = 0.2, inherit.aes = F) +
  theme_classic() +
  scale_linetype_manual(values = c(
    "All Candidates Nonpartisan" = "solid",
    "Non-Latino Known D" = "dashed",
    "Non-Latino Known R" = "twodash")) +
  scale_color_manual(values = c(
    "All Candidates Nonpartisan" = "darkgreen",
    "Non-Latino Known D" = "lightblue",
    "Non-Latino Known R" = "lightcoral")) + 
  scale_fill_manual(values = c(
    "All Candidates Nonpartisan" = "darkgreen",
    "Non-Latino Known D" = "lightblue",
    "Non-Latino Known R" = "lightcoral")) +
  theme(legend.position = "bottom") +
  scale_y_continuous(limits = c(0, 1), breaks = 
                       seq(0, 1, by = 0.2)) +
  scale_x_continuous(limits = c(0, 1), breaks = 
                       seq(0, 1, by = 0.2)) +
  labs(x = "Share of Registered Voters Latino in Precinct",
       y = "Predicted Latino Candidate(s) Vote Share",
       title = "Latino Candidate Support by Precinct Composition and Opponent Partisanship",
       caption = "Predicted values from Column 1 model of Table 1, using Tracy, CA 2010 FE") +
  theme(plot.title = element_text(size = 10, hjust = 0.5),
        legend.title = element_text(size = 8),
        legend.text = element_text(size = 8),
        axis.title.y = element_text(size = 8.5))

# Step 5.8: Display Plot in R Viewer
suppressWarnings(print(combined_inter))

# =========================================================================== #
#              SUBSETTED REGRESSION TO HIGHEST LATINO CONCENTRATIONS          #
#                           (Appendix Table A.3)                              #
# =========================================================================== #

high_latino_pooled_lm <- feols(p_hisp_votes ~ p_hisp_reg +
                                 p_hisp_reg*knownD_hisp +
                                 p_hisp_reg*knownR_hisp +
                                 p_hisp_reg*knownD_nonhisp +
                                 p_hisp_reg*knownR_nonhisp | year^geo_name,
                               data = dat %>% filter(total_reg >= 5, 
                                                     p_hisp_reg >= 0.5))
summary(high_latino_pooled_lm)

high_latino_viable_lm <- feols(via_lat_vs ~ p_hisp_reg +
                                  p_hisp_reg*via_lat_knownD +
                                  p_hisp_reg*via_lat_knownR +
                                  p_hisp_reg*knownD_nonhisp +
                                  p_hisp_reg*knownR_nonhisp | year^geo_name,
                                data = dat %>% 
                                 filter(total_reg >= 5 & !is.na(via_lat_vs) 
                                        & p_hisp_reg >= 0.5))
summary(high_latino_viable_lm)

high_latino_two_cand_lm <- feols(p_hisp_votes ~ p_hisp_reg +
                                   p_hisp_reg*knownD_hisp +
                                   p_hisp_reg*knownR_hisp +
                                   p_hisp_reg*knownD_nonhisp +
                                   p_hisp_reg*knownR_nonhisp | year^geo_name,
                                 data = dat %>% filter(total_reg >= 5 & 
                                                         n_candidates == 2 &
                                                         p_hisp_reg >= 0.5))
summary(high_latino_two_cand_lm)

# =========================================================================== #
#                 CANDIDATE VALENCE ANALYSIS (Table 4)                        #
# =========================================================================== #

# --------------------------------------------------------------------------- #
# STEP 1: Generate Prior Partisan Candidacy Indicators                        #
# --------------------------------------------------------------------------- #

# Step 1.1: KnownD_hisp Prior Office Indicator
dat <- dat %>%
  mutate(
    knownD_hisp_pc = case_when(
      geo_name == "san diego" & year == 2020 & month == 11 ~ 1,
      geo_name == "santa ana" & year == 2020 & month == 11 ~ 1,
      geo_name == "oceanside" & year == 2020 & month == 11 ~ 1,
      geo_name == "west sacramento" & year == 2020 & month == 11 ~ 1,
      TRUE ~ 0
    )
  )

# Step 1.2: KnownR_hisp Prior Office Indicator
dat <- dat %>%
  mutate(
    knownR_hisp_pc = case_when(
      geo_name == "anaheim" & year == 2018 & month == 11 ~ 1,
      geo_name == "oceanside" & year == 2020 & month == 11 ~ 1,
      TRUE ~ 0
    )
  )

# Step 1.3: KnownD_nonhisp Prior Office Indicator
dat <- dat %>%
  mutate(
    knownD_nonhisp_pc = case_when(
      geo_name == "san francisco" & year == 2011 & month == 11 ~ 1,
      geo_name == "sacramento" & year == 2016 & month == 6 ~ 1,
      geo_name == "fresno" & year == 2020 & month == 3 ~ 1,
      geo_name == "palmdale" & year == 2020 & month == 11 ~ 1,
      TRUE ~ 0
    )
  )

# Step 1.4: KnownR_nonhisp Prior Office Indicator
dat <- dat %>%
  mutate(
    knownR_nonhisp_pc = case_when(
      geo_name == "long beach" & year == 2014 & month == 6 ~ 1,
      geo_name == "anaheim" & year == 2014 & month == 11 ~ 1,
      geo_name == "fontana" & year == 2014 & month == 11 ~ 1,
      geo_name == "palmdale" & year == 2016 & month == 11 ~ 1,
      geo_name == "fontana" & year == 2018 & month == 11 ~ 1,
      geo_name == "napa" & year == 2020 & month == 11 ~ 1,
      TRUE ~ 0
    )
  )

# --------------------------------------------------------------------------- #
# STEP 2: Generate Incumbency Indicators                                      #
# --------------------------------------------------------------------------- #

# Step 2.1: Define Candidate Numbers
cand_nums <- 1:23

# Step 2.2: KnownD_hisp_inc Indicator
dat <- dat %>%
  rowwise() %>%
  mutate(
    knownD_hisp_inc = as.integer(
      any(
        sapply(cand_nums, function(i) {
          get(paste0("race_cand_", i)) == "hispanic" &
            get(paste0("inc_cand_", i)) == "Yes" &
            get(paste0("party_cand_", i)) == "Known D"
        })
      )
    )
  ) %>%
  ungroup()

# Step 2.3: KnownR_hisp_inc Indicator
dat <- dat %>%
  rowwise() %>%
  mutate(
    knownR_hisp_inc = as.integer(
      any(
        sapply(cand_nums, function(i) {
          get(paste0("race_cand_", i)) == "hispanic" &
            get(paste0("inc_cand_", i)) == "Yes" &
            get(paste0("party_cand_", i)) == "Known R"
        })
      )
    )
  ) %>%
  ungroup()

# Step 2.4: KnownD_nonhisp_inc Indicator
dat <- dat %>%
  rowwise() %>%
  mutate(
    knownD_nonhisp_inc = as.integer(
      any(
        sapply(cand_nums, function(i) {
          get(paste0("race_cand_", i)) != "hispanic" &
            get(paste0("inc_cand_", i)) == "Yes" &
            get(paste0("party_cand_", i)) == "Known D"
        })
      )
    )
  ) %>%
  ungroup()

# Step 2.5: KnownR_nonhisp_inc Indicator
dat <- dat %>%
  rowwise() %>%
  mutate(
    knownR_nonhisp_inc = as.integer(
      any(
        sapply(cand_nums, function(i) {
          get(paste0("race_cand_", i)) != "hispanic" &
            get(paste0("inc_cand_", i)) == "Yes" &
            get(paste0("party_cand_", i)) == "Known R"
        })
      )
    )
  ) %>%
  ungroup()

# Step 2.6: Clean-up the NA values
dat <- dat %>%
  mutate(
    knownD_hisp_inc = if_else(is.na(knownD_hisp_inc), 0, knownD_hisp_inc),
    knownR_hisp_inc = if_else(is.na(knownR_hisp_inc), 0, knownR_hisp_inc),
    knownD_nonhisp_inc = if_else(is.na(knownD_nonhisp_inc), 0, knownD_nonhisp_inc),
    knownR_nonhisp_inc = if_else(is.na(knownR_nonhisp_inc), 0, knownR_nonhisp_inc)
  )

# --------------------------------------------------------------------------- #
# STEP 3: Run the Fully Interacted Regression                                 #
# --------------------------------------------------------------------------- #

# Step 3.1: Define Formula
valence_lm <- feols(p_hisp_votes ~ p_hisp_reg +
                      p_hisp_reg*knownD_hisp +
                      p_hisp_reg*knownR_hisp +
                      p_hisp_reg*knownD_nonhisp +
                      p_hisp_reg*knownR_nonhisp +
                      p_hisp_reg*knownD_hisp_pc +
                      p_hisp_reg*knownR_hisp_pc +
                      p_hisp_reg*knownD_nonhisp_pc +
                      p_hisp_reg*knownR_nonhisp_pc +
                      p_hisp_reg*knownD_hisp_inc +
                      p_hisp_reg*knownR_hisp_inc +
                      p_hisp_reg*knownD_nonhisp_inc +
                      p_hisp_reg*knownR_nonhisp_inc
                    | year^geo_name,
                    data = dat %>% filter(total_reg >= 5))

# Step 3.2: Present Model's Results
summary(valence_lm)


# =========================================================================== #
#                 PARTISAN LEAN ANALYSIS (Table 5)                            #
# =========================================================================== #

# --------------------------------------------------------------------------- #
# STEP 1: Create the Net Partisanship Variables                               #
# --------------------------------------------------------------------------- #

dat <- dat %>%
  mutate(
    net_part_hisp = p_hisp_dem_reg - p_hisp_rep_reg,
    net_part_nonhisp = p_n_hisp_dem_reg - p_n_hisp_rep_reg
  )

# --------------------------------------------------------------------------- #
# STEP 2: Run Model                                                           #
# --------------------------------------------------------------------------- #

# Step 2.1: Specify Formula
net_model <- feols(
  p_hisp_votes ~ net_part_hisp +
    net_part_hisp * knownD_hisp +
    net_part_hisp * knownR_hisp +
    net_part_hisp * knownD_nonhisp +
    net_part_hisp * knownR_nonhisp +
    net_part_nonhisp | year^geo_name,
  data = dat %>% filter(total_reg >= 5)
)

# Step 2.2: Display Results
summary(net_model)

# =========================================================================== #
#                 GROUP CONFLICT ANALYSIS (Table 6)                           #
# =========================================================================== #

# --------------------------------------------------------------------------- #
# STEP 1: Dissimilarity Interaction Model                                     #
# --------------------------------------------------------------------------- #

diss_test <- feols(p_hisp_votes ~ 
                     p_hisp_reg * (knownD_hisp + knownR_hisp +
                                     knownD_nonhisp + knownR_nonhisp) *
                     diss_index | geo_name^year,
                   data = dat %>% filter(total_reg >=5))
summary(diss_test)


# Just for now, creating the modelsummary table
diss_map <- c(
  "p_hisp_reg" = "Share of Precinct Registration Latino",
  "p_hisp_reg:diss_index" = "Share Latino X Dissimilarity Index",
  "p_hisp_reg:knownD_hisp" = "Share Latino X Latino Known D",
  "p_hisp_reg:knownD_hisp:diss_index" = "Share Latino X Latino Known D X Dissimilarity",
  "p_hisp_reg:knownR_hisp" = "Share Latino X Latino Known R",
  "p_hisp_reg:knownR_hisp:diss_index" = "Share Latino X Latino Known R X Dissimilarity",
  "p_hisp_reg:knownD_nonhisp" = "Share Latino X Non-Latino Known D",
  "p_hisp_reg:knownD_nonhisp:diss_index" = "Share Latino X Non-Latino Known D X Dissimilarity",
  "p_hisp_reg:knownR_nonhisp" = "Share Latino X Non-Latino Known R",
  "p_hisp_reg:knownR_nonhisp:diss_index" = "Share Latino X Non-Latino Known R X Dissimilarity")

modelsummary(diss_test,
             coef_map = diss_map,
             stars = T,
             gof_omit = 'AIC|BIC|RMSE',
             output = 'latex')

# --------------------------------------------------------------------------- #
# STEP 2: Socio-economic Gaps Model                                           #
# --------------------------------------------------------------------------- #

# Step 2.1: Calculate the gaps
citylevel <- dat %>% 
  select(geo_name, year, diss_index,
         income_gap, poverty_gap, education_gap,
         homeowner_gap, assistance_gap) %>% distinct() %>%
  mutate(
    # Step 2.1.1: Income Gap
    i_gap_std = as.numeric(scale(income_gap)),
    ## Step 2.1.2: Poverty Gap
    p_gap_std = as.numeric(scale(poverty_gap*-1)),
    ## Step 2.1.3: Education Gap
    e_gap_std = as.numeric(scale(education_gap)),
    ## Step 2.1.4: Homeowner Gap
    h_gap_std = as.numeric(scale(homeowner_gap)),
    ## Step 2.1.5: Assistance Gap
    a_gap_std = as.numeric(scale(assistance_gap*-1))
  )

# Step 2.2: Calculate Composite Score
citylevel <- citylevel %>% mutate(
  conflict = (i_gap_std + p_gap_std + e_gap_std + h_gap_std + a_gap_std)/5
)

# Step 2.3: Join Composite Score With Precinct Data
dat <- dat %>% left_join(citylevel %>% select(geo_name, year, conflict),
                         by = c("geo_name", "year"))


# Step 2.4: Estimate Fixed-effects Models Interacting this Variable
conf_test <- feols(p_hisp_votes ~ 
                     p_hisp_reg * (knownD_hisp + knownR_hisp +
                                     knownD_nonhisp + knownR_nonhisp) *
                     conflict | geo_name^year,
                   data = dat %>% filter(total_reg >=5))
summary(conf_test)

# --------------------------------------------------------------------------- #
# STEP 3: Plotting These Results                                           ```#
# --------------------------------------------------------------------------- #

# Step 3.1: Creating Prediction Data
test_data <- data_frame(
  p_hisp_reg = rep(seq(0,1, by = 0.1), 15),
  knownD_hisp = c(rep(0,11),rep(1,11),rep(0,33), 
                  rep(0,11),rep(1,11),rep(0,33),
                  rep(0,11),rep(1,11),rep(0,33)),
  knownR_hisp = c(rep(0,22),rep(1,11),rep(0,22),
                  rep(0,22),rep(1,11),rep(0,22),
                  rep(0,22),rep(1,11),rep(0,22)),
  knownD_nonhisp = c(rep(0,33),rep(1,11),rep(0,11),
                     rep(0,33),rep(1,11),rep(0,11),
                     rep(0,33),rep(1,11),rep(0,11)),
  knownR_nonhisp = c(rep(0,44),rep(1,11),
                     rep(0,44),rep(1,11),
                     rep(0,44),rep(1,11)),
  year = 2010,
  geo_name = "tracy",
  diss_index = c(rep((mean(citylevel$diss_index) - 2*sd(citylevel$diss_index)), 55),
                 rep(mean(citylevel$diss_index), 55),
                 rep((mean(citylevel$diss_index) + 2*sd(citylevel$diss_index)), 55)),
  conflict = c(rep((mean(citylevel$conflict) - 2*sd(citylevel$conflict)), 55),
               rep(mean(citylevel$conflict), 55),
               rep((mean(citylevel$conflict) + 2*sd(citylevel$conflict)), 55))
)

# Step 3.2: Extract Coefficients and Standard Errors from Models

## Step 3.2.1: Dissimilarity
diss_coefs <- as.list(coefficients(diss_test))
diss_ses <- as.list(se(diss_test))
diss_fe <- fixef(diss_test)$`geo_name^year`["tracy_2010"]

## Step 3.2.2: Conflict
conf_coefs <- as.list(coefficients(conf_test))
conf_ses <- as.list(se(conf_test))
conf_fe <- fixef(conf_test)$`geo_name^year`["tracy_2010"]

# Step 3.3: Generate Predicted Values
test_data <- test_data %>%
  mutate(
    diss_pred = diss_fe +
      (as.numeric(diss_coefs["p_hisp_reg"]) * p_hisp_reg) +
      (as.numeric(diss_coefs["p_hisp_reg:knownD_hisp"]) * p_hisp_reg * knownD_hisp) +
      (as.numeric(diss_coefs["p_hisp_reg:knownR_hisp"]) * p_hisp_reg * knownR_hisp) +
      (as.numeric(diss_coefs["p_hisp_reg:knownD_nonhisp"]) * p_hisp_reg * knownD_nonhisp) +
      (as.numeric(diss_coefs["p_hisp_reg:knownR_nonhisp"]) * p_hisp_reg * knownR_nonhisp) +
      (as.numeric(diss_coefs["p_hisp_reg:diss_index"]) * p_hisp_reg * diss_index) +
      (as.numeric(diss_coefs["p_hisp_reg:knownD_hisp:diss_index"]) * p_hisp_reg * knownD_hisp * diss_index) +
      (as.numeric(diss_coefs["p_hisp_reg:knownR_hisp:diss_index"]) * p_hisp_reg * knownR_hisp * diss_index) +
      (as.numeric(diss_coefs["p_hisp_reg:knownD_nonhisp:diss_index"]) * p_hisp_reg * knownD_nonhisp * diss_index) +
      (as.numeric(diss_coefs["p_hisp_reg:knownR_nonhisp:diss_index"]) * p_hisp_reg * knownR_nonhisp * diss_index),
    diss_pred_var = (as.numeric(diss_ses["p_hisp_reg"]) * p_hisp_reg)^2 +
      (as.numeric(diss_ses["p_hisp_reg:knownD_hisp"]) * p_hisp_reg * knownD_hisp)^2 +
      (as.numeric(diss_ses["p_hisp_reg:knownR_hisp"]) * p_hisp_reg * knownR_hisp)^2 +
      (as.numeric(diss_ses["p_hisp_reg:knownD_nonhisp"]) * p_hisp_reg * knownD_nonhisp)^2 +
      (as.numeric(diss_ses["p_hisp_reg:knownR_nonhisp"]) * p_hisp_reg * knownR_nonhisp)^2 +
      (as.numeric(diss_ses["p_hisp_reg:diss_index"]) * p_hisp_reg * diss_index)^2 +
      (as.numeric(diss_ses["p_hisp_reg:knownD_hisp:diss_index"]) * p_hisp_reg * knownD_hisp * diss_index)^2 +
      (as.numeric(diss_ses["p_hisp_reg:knownR_hisp:diss_index"]) * p_hisp_reg * knownR_hisp * diss_index)^2 +
      (as.numeric(diss_ses["p_hisp_reg:knownD_nonhisp:diss_index"]) * p_hisp_reg * knownD_nonhisp * diss_index)^2 +
      (as.numeric(diss_ses["p_hisp_reg:knownR_nonhisp:diss_index"]) * p_hisp_reg * knownR_nonhisp * diss_index)^2,
    diss_lower = diss_pred - (1.96 * sqrt(diss_pred_var)),
    diss_upper = diss_pred + (1.96 * sqrt(diss_pred_var)),
    conf_pred = conf_fe +
      (as.numeric(conf_coefs["p_hisp_reg"]) * p_hisp_reg) +
      (as.numeric(conf_coefs["p_hisp_reg:knownD_hisp"]) * p_hisp_reg * knownD_hisp) +
      (as.numeric(conf_coefs["p_hisp_reg:knownR_hisp"]) * p_hisp_reg * knownR_hisp) +
      (as.numeric(conf_coefs["p_hisp_reg:knownD_nonhisp"]) * p_hisp_reg * knownD_nonhisp) +
      (as.numeric(conf_coefs["p_hisp_reg:knownR_nonhisp"]) * p_hisp_reg * knownR_nonhisp) +
      (as.numeric(conf_coefs["p_hisp_reg:conflict"]) * p_hisp_reg * conflict) +
      (as.numeric(conf_coefs["p_hisp_reg:knownD_hisp:conflict"]) * p_hisp_reg * knownD_hisp * conflict) +
      (as.numeric(conf_coefs["p_hisp_reg:knownR_hisp:conflict"]) * p_hisp_reg * knownR_hisp * conflict) +
      (as.numeric(conf_coefs["p_hisp_reg:knownD_nonhisp:conflict"]) * p_hisp_reg * knownD_nonhisp * conflict) +
      (as.numeric(conf_coefs["p_hisp_reg:knownR_nonhisp:conflict"]) * p_hisp_reg * knownR_nonhisp * conflict),
    conf_pred_var = (as.numeric(conf_ses["p_hisp_reg"]) * p_hisp_reg)^2 +
      (as.numeric(conf_ses["p_hisp_reg:knownD_hisp"]) * p_hisp_reg * knownD_hisp)^2 +
      (as.numeric(conf_ses["p_hisp_reg:knownR_hisp"]) * p_hisp_reg * knownR_hisp)^2 +
      (as.numeric(conf_ses["p_hisp_reg:knownD_nonhisp"]) * p_hisp_reg * knownD_nonhisp)^2 +
      (as.numeric(conf_ses["p_hisp_reg:knownR_nonhisp"]) * p_hisp_reg * knownR_nonhisp)^2 +
      (as.numeric(conf_ses["p_hisp_reg:conflict"]) * p_hisp_reg * conflict)^2 +
      (as.numeric(conf_ses["p_hisp_reg:knownD_hisp:conflict"]) * p_hisp_reg * knownD_hisp * conflict)^2 +
      (as.numeric(conf_ses["p_hisp_reg:knownR_hisp:conflict"]) * p_hisp_reg * knownR_hisp * conflict)^2 +
      (as.numeric(conf_ses["p_hisp_reg:knownD_nonhisp:conflict"]) * p_hisp_reg * knownD_nonhisp * conflict)^2 +
      (as.numeric(conf_ses["p_hisp_reg:knownR_nonhisp:conflict"]) * p_hisp_reg * knownR_nonhisp * conflict)^2,
    conf_lower = conf_pred - (1.96 * sqrt(conf_pred_var)),
    conf_upper = conf_pred + (1.96 * sqrt(conf_pred_var))
  )

# Step 3.4: Preparing Object for Plotting
test_data <- test_data %>%
  mutate(
    diss_index = round(diss_index, digits = 3),
    conflict = round(conflict, digits = 3)) %>%
  mutate(
    dissimilarity = case_when(
      diss_index == 0.06 ~ "Low Dissimilarity\n(-2 SD)",
      diss_index == 0.272 ~ "Average Dissimilarity\n(0.272)",
      diss_index == 0.484 ~ "High Dissimilarity\n(+2 SD)"
    ),
    group_conflict = case_when(
      conflict == -1.351 ~ "Low Socioeconomic Disparity\n(-2 SD)",
      conflict == 0 ~ "Average Disparity\n(0)",
      conflict == 1.351 ~ "High Socioeconomic Disparity\n(+2 SD)"
    ),
    dissimilarity = factor(dissimilarity,
                           levels = c("Low Dissimilarity\n(-2 SD)",
                                      "Average Dissimilarity\n(0.272)",
                                      "High Dissimilarity\n(+2 SD)")),
    group_conflict = factor(group_conflict,
                            levels = c("Low Socioeconomic Disparity\n(-2 SD)",
                                       "Average Disparity\n(0)",
                                       "High Socioeconomic Disparity\n(+2 SD)"))
  )

# Step 3.5: Dissimilarity Plot
diss_plot <- ggplot(test_data %>% filter(knownR_hisp == 1), aes(x = p_hisp_reg, y = diss_pred, color = dissimilarity, fill = dissimilarity)) +
  #geom_ribbon(aes(ymin = diss_lower, ymax = diss_upper), alpha = 0.15, color = NA) +
  geom_line(size = 0.9) +
  facet_wrap(~ dissimilarity, nrow = 1, scales = "fixed") +
  guides(color = "none", fill = "none") +
  labs(x = "Share of Registered Voters Latino in Precinct", 
       y = "Predicted Latino Candidate(s)\nVote Share",
       subtitle = "Conditional Effect of Residential Segregation on Partisan Information",
       title = "Contests with Known Republican Latino Candidates") +
  scale_y_continuous(limits = c(0, 1)) +
  theme_minimal() +
  theme(panel.border = element_rect(colour = "black", fill = NA, linewidth = 0.75),
        plot.subtitle = element_text(hjust = 0.5, size = 9),
        plot.title = element_text(hjust = 0.5, size = 11),
        axis.title = element_text(size = 8),
        axis.text = element_text(size = 7.5))

# Step 3.6: Conflict Plot
conf_plot <- ggplot(test_data %>% filter(knownR_hisp == 1), aes(x = p_hisp_reg, y = conf_pred, color = group_conflict, fill = group_conflict)) +
  #geom_ribbon(aes(ymin = diss_lower, ymax = diss_upper), alpha = 0.15, color = NA) +
  geom_line(size = 0.9) +
  facet_wrap(~ group_conflict, nrow = 1, scales = "fixed") +
  guides(color = "none", fill = "none") +
  labs(x = "Share of Registered Voters Latino in Precinct", 
       y = "Predicted Latino Candidate(s)\nVote Share",
       subtitle = "Conditional Effect of Socioeconomic Disparities on Partisan Information",
       ) +
  scale_y_continuous(limits = c(0, 1)) +
  theme_minimal() +
  theme(panel.border = element_rect(colour = "black", fill = NA, linewidth = 0.75),
        plot.subtitle = element_text(hjust = 0.5, size = 9),
        plot.title = element_text(hjust = 0.5, size = 11),
        axis.title = element_text(size = 8),
        axis.text = element_text(size = 7.5))

# Step 3.7: Patchwork Together
combined_group <- (diss_plot / conf_plot) +
  plot_annotation(
    caption = "Predicted values from models presented in Table 6, using Tracy, CA 2010 FE"
  )

print(combined_group)

# =========================================================================== #
#               CORRELATES OF PARTISAN DISCLOSURE (Table A.2)                 #
# =========================================================================== #

# --------------------------------------------------------------------------- #
# STEP 0: Load in Candidate Level Data                                        #
# --------------------------------------------------------------------------- #

candidatelevel <- read_csv('candidatelevel.csv')

# --------------------------------------------------------------------------- #
# STEP 1: Logit Models on Correlates of Disclosure                            #
# --------------------------------------------------------------------------- #

# Step 1.1: Latino Democrat
lat_D <- glm(revealed_D ~ log(total_pop) + p_hisp_pop + p_dem_reg +
               gender + inc + 
               oth_revealed_D +
               oth_revealed_R +
               offcycle +
               margin_of_victory + 
               factor(year), 
             data = candidatelevel %>% filter(race == "hispanic"),
             family = "binomial")
summary(lat_D)

# Step 1.2: Latino Republican
lat_R <- glm(revealed_R ~ log(total_pop) + p_hisp_pop + p_dem_reg +
               gender + inc + 
               oth_revealed_D +
               oth_revealed_R +
               offcycle +
               margin_of_victory + 
               factor(year), 
             data = candidatelevel %>% filter(race == "hispanic"),
             family = "binomial")
summary(lat_R)

# Step 1.3: Non-Latino Democrat
nonlat_D <- glm(revealed_D ~ log(total_pop) + p_hisp_pop + p_dem_reg +
                  gender + inc + 
                  oth_revealed_D +
                  oth_revealed_R +
                  offcycle +
                  margin_of_victory +
                  factor(year), 
                data = candidatelevel %>% filter(race != "hispanic"),
                family = "binomial")
summary(nonlat_D)

# Step 1.4: Non-Latino Republican
nonlat_R <- glm(revealed_R ~ log(total_pop) + p_hisp_pop + p_dem_reg +
                  gender + inc + 
                  oth_revealed_D +
                  oth_revealed_R +
                  offcycle +
                  margin_of_victory +
                  factor(year), 
                data = candidatelevel %>% filter(race != "hispanic"),
                family = "binomial")
summary(nonlat_R)

# Step 1.5: All Candidates
candidatelevel <- candidatelevel %>%
  mutate(revealed = if_else(revealed_D == 1 | revealed_R == 1, 1, 0))

allcands <- glm(revealed ~ log(total_pop) + p_hisp_pop + p_dem_reg +
                  gender + inc + 
                  oth_revealed_D +
                  oth_revealed_R +
                  offcycle +
                  margin_of_victory +
                  factor(year), 
                data = candidatelevel,
                family = "binomial")

# --------------------------------------------------------------------------- #
# STEP 2: Put Models into Table                                               #
# --------------------------------------------------------------------------- #

coef_map <- c(
  "log(total_pop)" = "City Population (Logged)",
  "p_hisp_pop" = "Prop. Population Latino",
  "p_dem_reg" = "Prop. Democratic Registration",
  "genderF" = "Female",
  "incYes" = "Incumbent Mayor",
  "oth_revealed_D" = "Another Candidate Revealed D",
  "oth_revealed_R" = "Another Candidate Revealed R",
  "offcycle" = "Off-cycle Election",
  "margin_of_victory" = "Contest Winner MoV",
  "factor(year)2011" = "2011",
  "factor(year)2012" = "2012",
  "factor(year)2013" = "2013",
  "factor(year)2014" = "2014",
  "factor(year)2015" = "2015",
  "factor(year)2016" = "2016",
  "factor(year)2017" = "2017",
  "factor(year)2018" = "2018",
  "factor(year)2019" = "2019",
  "factor(year)2020" = "2020"
)

modelsummary(list("Latino Democrats" = lat_D,
                  "Latino Republicans" = lat_R,
                  "Non-Latino Democrats" = nonlat_D,
                  "Non-Latino Republicans" = nonlat_R,
                  "All Candidates" = allcands),
             coef_map = coef_map,
             stars = T,
             output = 'latex')

